rm(list = ls())
#This to obtain:
#table_3.tex
library(stargazer)
library(ggplot2)
library(dplyr)
library(lemon)
library(lfe)
library(broom)
library(tidyr)
library(lemon)
library(ggplot2)
library(grid)
library(raster)
library(lmtest)
library(multiwayvcov)
library("ggplot2")
library("glue")
library("spdep")
library('dismo')
library(spmoran)
library(stringr)
library("readxl")


######################################
#Function for cleaning the list of FE#
######################################
clean_list <- function(old_list, particle) {
  term <- paste(glue("^{particle}"))
  new_list = list() 
  for (i in old_list) {
    #print(i)
    temp_list = c()
    
    for (j in i){
      if (!grepl(term, j)){
        temp_list <- c(temp_list, j)
      }
    }
    l = length(new_list)
    new_list[[l+1]] <- temp_list
  }
  return(new_list)}


#Functions for coefficient plots
rerun_regression<-function(regression_name, string_dv, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes already existing regression objects, associates them with an 
  #"official" label, calculates robust clustered SE and CI based on a cluster
  #that is in the original dataset
  #and puts the results in a tibble
  
  #Input:
  #-Regression model object after lm
  #-The label for the DV that you want displayed in ggplot
  #-original dataset that contains thhe names of the clusters
  
  #Output:
  #-A tibble with regression coefficients from the model that you ran with:
  #--robust SE
  #--robust CI
  
  #Recreate dataframe from the regression object
  remake_data<-regression_name$model
  #Scale the DV
  remake_data[1]<-scale(remake_data[1])
  #Take the string of the DV and IV
  dv<-regression_name$terms[[2]]
  ivs<-regression_name$terms[[3]]
  ivs_str<-paste(ivs)[2]
  dv_str<-paste(dv, "~", collapse = "")
  dv_ivs_str<-paste(dv_str, ivs_str, sep=" ")
  #Create specification and run model
  specification = as.formula(dv_ivs_str)
  model <- lm(specification, data = remake_data)
  no_obs<-nobs(model)
  no_obs<-paste("Obs: ", as.character(no_obs), sep = "")
  #Calculate robust SE
  model_se<-se_robust_cluster(model, orig_data_for_clusters, cluster_name)
  #Create tibble
  model_tidy<-tidy(model)
  model_tidy$no_obs<-no_obs
  model_se_tidy<-tidy(model_se)
  names(model_se_tidy)[1] <- "term"
  names(model_se_tidy)[2] <- "robust.se"
  model_tidy2<-left_join(model_tidy, model_se_tidy, by = c("term"="term"))
  #Calculate CI
  df_ci<-as.data.frame(ci_robust_cluster(model, orig_data_for_clusters, cluster_name))
  df_ci <- cbind(term = rownames(df_ci), df_ci)
  names(df_ci)[2] <- "lb_95"
  names(df_ci)[3] <- "ub_95"
  
  #Merge CI to the dataframe
  model_tidy3<-left_join(model_tidy2, df_ci, by = c("term"="term"))
  
  #Only keep the treat variable
  model_tidy4<-subset(model_tidy3, term=="treat")
  model_tidy4$dv_official<-string_dv
  
  return(model_tidy4)
}


return_graphs_coef<-function(models_run, models_run_alias, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes list of regression objects, associates them with a 
  #list of "official" labels - models_run_alias, 
  #create cluster robust SE with name clusters taken from the original dataset - orig_data_for_clusters
  #keep the "treat" variable and
  #produce ggplots with those
  
  #Input:
  #-List of Regression model object after lm
  #-List of label for the DV that you want displayed in ggplot
  #-Original dataset name from which te function takes the cluster names
  
  #Output:
  #-A ggplot with regression coefficients and robust CI
  
  names(models_run)<-models_run_alias
  df<-list()
  length(df)<-length(models_run)
  counter = 1
  for (i in 1:length(models_run)){
    #print(i)
    x<-models_run[[i]]
    name_x<-names(models_run)[i]
    #print(name_x)
    model_tidy<-rerun_regression(x, name_x, orig_data_for_clusters, cluster_name)
    model_tidy$counter<-counter
    #print(model_tidy)
    df[[i]] = model_tidy
    counter = counter +1
    print(paste("Rerun regression", counter, sep=" "))}
  print(df)
  big_data = do.call(rbind, df)
  irislabs1 <- rev(big_data$dv_official)
  irislabs2 <- rev(big_data$no_obs)
  #return(big_data)
  mean_feature <- ggplot(data = big_data, aes(x=estimate, y=as.numeric(reorder(dv_official, desc(counter))))) +
    geom_point(size = 2) +
    geom_errorbar(aes(xmin = lb_95, xmax = ub_95), size = 0.9, width = 0)+
    scale_y_continuous(name = "",
                       breaks = 1:length(irislabs1),
                       labels = irislabs1,
                       sec.axis = sec_axis(~.,
                                           breaks = 1:length(irislabs2),
                                           labels = irislabs2))+
    geom_vline(xintercept = 0) +
    theme_bw() +
    scale_x_continuous(name = "Estimate") +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title=element_text(size=16))  
  #return(big_data)
  return(mean_feature)
}


clean_col_labels<-function(column_labels){
  #Process:
  #-takes list of column labels for stargazer
  #and removes the unnecessary tags
  
  #Input:
  #-list of column labels for stargazer
  
  #Output:
  #-retruns list of clean labels
  df<-list()
  length(df)<-length(column_labels)
  counter = 1
  for (i in column_labels){
    #print(i)
    j<-str_remove(i, '\\{')
    k<-str_remove(j, '\\}')
    l<-str_remove(k, '\\\\')
    m<-str_remove(l, 'shortstack')
    n<-gsub("\\\\", "\n", m)
    o<-gsub("\n\n", "\n", n)
    p<-gsub(",", "", o)
    df[counter] = p
    counter = counter +1
    df<-unlist(df)
  }
  return(df)
}

se_robust_cluster <- function(x, data_sample_shape, cluster_name){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Merge by rowname or index
  data_subset <- merge(data_sample_shape, data_model, by=0)
  #Remove any missing
  data_subset<-subset(data_subset, !is.na(in_regression))
  dat_clusters<-data_subset[cluster_name]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res)}

ci_robust_cluster <- function(x, data_sample_shape, cluster_name){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Merge by rowname or index
  data_subset <- merge(data_sample_shape, data_model, by=0)
  #Remove any missing
  data_subset<-subset(data_subset, !is.na(in_regression))
  dat_clusters<-data_subset[cluster_name]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res_ci)}



########################################################################
#Function for returning results that include controls for SP. Autocorr.#
########################################################################

spatial_regression_meigen <- function(data_sample, y, x, unique_observation_id) {
  clean_x = clean_list(x, "quad")
  clean_x = unlist(clean_list(clean_x, "bfe"))
  
  
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(clean_x, collapse = "+")))
  bw<-subset(data_sample, select =c(x, y, unique_observation_id))
  bw<-bw[!sf::st_is_empty(bw), ]
  bw<-na.omit(bw)
  #Step1: Extracting coordinates
  coords<- subset(bw, select = c("POINT_X","POINT_Y"))
  #Step2: Moran eigenvalues and eigenvectors
  coords<-st_drop_geometry(coords)
  #print(coords)
  meig <- meigen(coords=coords)
  #print(meig)
  print("Done Spatial Filtering")
  
  #Step3: Adding the eigenvectors back to the dataframes
  bw_fitted<-cbind(bw, as.data.frame(meig$sf),
                   id = row.names(bw))
  #Keeping track of indices
  ##This will allow us to see exactly which observations were used in the regressions
  row.names(bw_fitted) <- bw_fitted$id
  bw_fitted<-subset(bw_fitted, select = -c(id))
  #Step4: Extracting the names of all eigenvectors
  list_fitted_ei<-names(bw_fitted[, grepl("V", names(bw_fitted))])
  list_fitted_ei<-list_fitted_ei[list_fitted_ei != "Shape"]
  #Step5: Appending the eigenvectors back to all IVs
  all_vars<-append(unlist(x), list_fitted_ei)
  specification_with_eigenv = as.formula(paste(glue("{y}~"), 
                                               paste(all_vars, collapse = "+")))
  model1 <- lm(specification_with_eigenv, data = bw_fitted)
  #Step6: Calculate Moran's I
  y<-bw[[y]]
  x <- subset(bw, select = clean_x)
  x<-st_drop_geometry(x)
  res <- resf(y=y, x=x, meig=meig)
  #These is the Moran'I value
  moran_i<-round(res$s[[2,1]], 3)
  random_se<-round(res$s[[1,1]], 3)
  return(list(model1, moran_i, random_se))
  }


#Step 1: Read Data
#setwd("C:/Users/bogdanp/")
setwd("/Users/bgpopescu/")
#setwd("/Users/bogdan.popescu/")

census_1895_dist <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                            layer="census_1895_points")
census_1895 <- subset(census_1895_dist, inside_croatia == 1)
census_1895 <- subset(census_1895, !is.na(lat) & !is.na(treat))

#Step 2: Perform calculation
census_1895$dist1 <- as.numeric( with (census_1895,ifelse(census_1895$treat==1, 1, -1)))
census_1895$krajna6_distance<-census_1895$krajna6_distance/1000
census_1895$zagreb_distance<-census_1895$zagreb_distance/1000
census_1895<-subset(census_1895, area!="*)")

census_1895$dist2 <-census_1895$dist1*census_1895$krajna6_distance
census_1895$bfe1 <- ifelse(census_1895$krajna6_NEAR_FID == 1, 1,0)
census_1895$bfe2 <- ifelse(census_1895$krajna6_NEAR_FID == 2, 1,0)
census_1895$bfe3 <- ifelse(census_1895$krajna6_NEAR_FID == 3, 1,0)
census_1895$bfe4 <- ifelse(census_1895$krajna6_NEAR_FID == 4, 1,0)
census_1895$bfe5 <- ifelse(census_1895$krajna6_NEAR_FID == 5, 1,0)
census_1895$bfe6 <- ifelse(census_1895$krajna6_NEAR_FID == 6, 1,0)
census_1895$bfe7 <- ifelse(census_1895$krajna6_NEAR_FID == 7, 1,0)
census_1895$bfe8 <- ifelse(census_1895$krajna6_NEAR_FID == 8, 1,0)

census_1895$POINT_X<-census_1895$lon
census_1895$POINT_Y<-census_1895$lat

census_1895$quad1<-ifelse((census_1895$lon > 18 & census_1895$lon< 20) & 
                       (census_1895$lat<47  & census_1895$lat>44), 1, 0)

census_1895$quad2<-ifelse((census_1895$lon > 16 & census_1895$lon< 18) & 
                       (census_1895$lat<47  & census_1895$lat>44), 1, 0)

census_1895$quad3<-ifelse((census_1895$lon > 14 & census_1895$lon< 16) & 
                       (census_1895$lat<47  & census_1895$lat>44), 1, 0)

#Creating an indicator variables for whether zadrugas exist
census_1895$zadruga_exist<-NA
census_1895$zadruga_exist[census_1895$existing_zadrugas_yokes!=0]<-1
census_1895$zadruga_exist[census_1895$existing_zadrugas_yokes==0]<-0

census_1895$log_existing_zadrugas_yokes<-log(census_1895$existing_zadrugas_yokes+1)
census_1895$prop_zadruga_from_total_area<-census_1895$existing_zadrugas_yokes/census_1895$total_area*100

census_1895$log_secretly_parted_zadrugas_yokes<-log(census_1895$secretly_parted_zadrugas_yokes + 1)
census_1895$prop_secretly_parted_zadrugas_from_total_area<-census_1895$secretly_parted_zadrugas_yokes/census_1895$total_area*100

census_1895$pop_zadrugas<-(census_1895$no_inhabitants*census_1895$existing_zadrugas_yokes)/census_1895$total_area
census_1895$pop_secret_zadrugas<-(census_1895$no_inhabitants*census_1895$secretly_parted_zadrugas_yokes)/census_1895$total_area
census_1895$log_pop_zadrugas<-log(census_1895$pop_zadrugas+1)

specifications<-list(c("treat", "POINT_X", "POINT_Y",
                       "zagreb_distance",
                       "bfe1", "bfe2", "bfe3", "bfe4",
                       "bfe5", 'bfe6', "bfe7",
                       "quad1", "quad2", "quad3"))

#Running the models
m_log_existing_zadrugas_yokes_list<-spatial_regression_meigen(census_1895, "log_existing_zadrugas_yokes", specifications[[1]], "unique_id_1985")
m_log_existing_zadrugas_yokes<-m_log_existing_zadrugas_yokes_list[[1]]
moran_i_log_existing_zadrugas_yokes<-m_log_existing_zadrugas_yokes_list[[2]]
random_se_log_existing_zadrugas_yokes<-m_log_existing_zadrugas_yokes_list[[3]]

m_prop_zadruga_from_total_area_list<-spatial_regression_meigen(census_1895, "prop_zadruga_from_total_area", specifications[[1]], "unique_id_1985")
m_prop_zadruga_from_total_area<-m_prop_zadruga_from_total_area_list[[1]]
moran_i_prop_zadruga_from_total_area<-m_prop_zadruga_from_total_area_list[[2]]
random_se_prop_zadruga_from_total_area<-m_prop_zadruga_from_total_area_list[[3]]

m_log_pop_zadrugas_list<-spatial_regression_meigen(census_1895, "log_pop_zadrugas", specifications[[1]], "unique_id_1985")
m_log_pop_zadrugas<-m_log_pop_zadrugas_list[[1]]
moran_i_log_pop_zadrugas<-m_log_pop_zadrugas_list[[2]]
random_se_log_pop_zadrugas<-m_log_pop_zadrugas_list[[3]]


#############
#FIRST GRAPH#
#############

relevant_vars<-c("log_existing_zadrugas_yokes",
                 "prop_zadruga_from_total_area",
                 "log_pop_zadrugas")


#####################
#Step2: MEANS AND SD#
#####################

means<-list()
sds<-list()
for (i in relevant_vars){
  item_mean<-round(mean(census_1895[[i]], na.rm=T), 3)
  item_sd<-round(sd(census_1895[[i]], na.rm=T), 3)
  means[[length(means) + 1]] <- item_mean 
  sds[[length(sds) + 1]] <- item_sd 
}
means<-unlist(means)
sds<-unlist(sds)


####################
#Step3: LIST MODELS#
####################

list_models<-list(m_log_existing_zadrugas_yokes,
                  m_prop_zadruga_from_total_area,
                  m_log_pop_zadrugas)

#######################
#Step4: Moran's I Test#
#######################

list_moran_i<-c(moran_i_log_existing_zadrugas_yokes,
                   moran_i_prop_zadruga_from_total_area,
                   moran_i_log_pop_zadrugas)


########################
#Step5: Moran random_se#
########################

list_random_se<-c(random_se_log_existing_zadrugas_yokes,
                     random_se_prop_zadruga_from_total_area,
                     random_se_log_pop_zadrugas)

#######################
#Step6: Column Labels#
######################

column_labels= c("\\shortstack{Log Area Covered\\\\ by Zadrugas\\\\ 1895}",
                 "\\shortstack{Pct. Area Covered\\\\ by Zadrugas/Total Area\\\\ 1895}",
                 "\\shortstack{Log Population\\\\ in Zadrugas\\\\ 1895}")


table3<-stargazer(list_models, 
                          object.names = F,
                          keep = c("treat"),
                          se = list(se_robust_cluster(m_log_existing_zadrugas_yokes, census_1895, "district"),
                                    se_robust_cluster(m_prop_zadruga_from_total_area, census_1895, "district"),
                                    se_robust_cluster(m_log_pop_zadrugas, census_1895, "district")),
                          column.labels= column_labels,
                          covariate.labels=c("Military Territory"),
                          dep.var.labels.include = F,
                          omit.stat=c("LL","ser","f", "rsq"), no.space=TRUE, float=FALSE,
                          add.lines=list(c("Mean", means),
                                         c("SD", sds),
                                         c("Boundary FE", rep("Yes", 14)),
                                         c("Region FE", rep("Yes", 14)),
                                         c("Moran eigenvectors", rep("Yes", 14)),
                                         c("Moran's I Test", list_moran_i),
                                         c("Moran Random SE",list_random_se)))
note_latex <- "\\multicolumn{4}{l} {\\parbox[t]{15cm}{ \\footnotesize \\textit{Notes:} 
Coefficients and county-clustered robust standard errors in parantheses from OLS regression. 
$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.
The unit of analysis for all columns is village. See codebook in the appendix for data sources and how the variables
were calculated.}}"
table3 [grepl("Note", table3)] <- note_latex
cat(table3, file="./Dropbox/Legacies_Project/Paper/tables/table_3.tex", sep="\n")
